import scrapy
import json


class FengtianSpider(scrapy.Spider):
    name = 'fengtian'
    allowed_domains = ['www.che168.com']
    start_urls = ['https://www.che168.com/china/fengtian/']

    def parse(self, response):
        items = response.xpath('//li[@name="lazyloadcpc"]')

        for i in items:
            link = response.urljoin(i.xpath('./a[@class="carinfo"]/@href').re_first('.*html'))
            model = i.xpath('.//h4[@class="card-name"]/text()').get()
            price = i.xpath('.//span[@class="pirce"]/em/text()').get()
            desc = i.xpath('.//p[@class="cards-unit"]/text()')
            # 5万公里／2018-05／深圳／6年会员商家
            dist = desc.re_first('.*公里')
            data = desc.re_first(r'\d{4}-\d{2}')
            city = desc.re_first('.*公里／.*／(.*)／')
            car_info = dict(
                model=model,  # 型号
                dist=dist,  # 里程数
                data=data,  # 上牌日期
                city=city,  # 所在地
                price=price,  # 价格
                link=link  # 链接
            )
            # print(json.dumps(car_info, ensure_ascii=False))
            yield scrapy.Request(link, callback=self.parse_detail_page, meta=car_info)

        next_url = response.urljoin(response.xpath('//a[@class="page-item-next"]/@href').get())
        if next_url:
            yield scrapy.Request(next_url, callback=self.parse)

    def parse_detail_page(self, response):
        car_info = response.meta
        detial = response.xpath('//div[@class="all-basic-content fn-clear"]')
        color = detial.re_first('<span class="item-name">车身颜色</span>(.*?)</li>')  # 使用正则获取颜色

        seller = response.xpath('//div[@class="protarit-list"]')
        seller_name = seller.re_first('<span class="manger-name">(.*?)</span>')
        seller_address = seller.re_first('<div class="protarit-adress">(.*?)</div>')

        car_info['color'] = color  # 车辆颜色
        car_info['seller_name'] = seller_name  # 卖家
        car_info['seller_address'] = seller_address  # 卖家地址
        print(json.dumps(car_info, ensure_ascii=False))
